This page presents visualization of the residual oil project.
The data comes from NYCCAS. I am comparing data in 2011, which is the year before implementation of the Clean Heat Program, and year 2016, which is the year by which all heating oil #6 should be converted.
The pollutants of interest are PM2.5 and SO2. Here I imported the data and plotted the spatial distribution in 2011 and 2016 and the changes for these two pollutants.
# pm in 2011
pm_12_plot <- ggplot(tract_plot) +
aes(long, lat, group = group, fill=cut_number(m__2012, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Blues") +
labs(
title = 'PM2.5, 2011, ug/m3',
x = '',
y = ''
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none')
# pm in 2016
pm_16_plot <- ggplot(tract_plot) +
aes(long, lat, group=group, fill=cut_number(m__2016, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Blues") +
labs(
title = str_c('PM2.5, 2016, ug/m3'),
x = '',
y = ''
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none')
# reduction in pm
pm_diff_plot <- ggplot(tract_plot) +
aes(long, lat, group=group, fill=cut_number(pm_diff, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Blues") +
labs(
title = str_c('PM2.5 (2011-2016),ug/m3'),
x = '',
y = ''
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none')
# relative difference
pm_rdiff_plot <- ggplot(tract_plot) +
aes(long, lat, group=group, fill=cut_number(pm_rdff, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Blues") +
labs(
title = str_c('PM2.5 (2011-2016), relative'),
x = '',
y = '',
caption = 'Data source: NYCCAS'
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none')
# combine
(pm_12_plot + pm_16_plot)/(pm_diff_plot + pm_rdiff_plot)
popup_pm <- str_c('<b>GEOID: </b>', nyc_tract$geoid, '<br>',
'<b>PM2.5 change: </b>', round(nyc_tract$PM2.5_change, digits = 2))
pal_pm <- colorQuantile('Greens', NULL, n = 5)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = nyc_tract,
fillColor = ~pal_pm(PM2.5_change),
color = 'grey',
weight = 1,
fillOpacity = 0.8,
popup = popup_pm,
highlight = highlightOptions(
weight = 4,
color = "#666",
fillOpacity = 0.3,
bringToFront = TRUE)) %>%
addLegend(data = nyc_tract,
pal = pal_pm,
values = ~nyc_tract$PM2.5_change, opacity = 0.7, title = NULL,
position = "bottomright")
I made a similar plot to show the changes in SO2 from 2012 to 2016. In fact, reduction.
popup_so2 <- str_c('<b>GEOID: </b>', nyc_tract$geoid, '<br>',
'<b>SO2 change: </b>', round(nyc_tract$SO2_change, digits = 2))
pal_so2 <- colorQuantile('Blues', NULL, n = 5)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = nyc_tract,
fillColor = ~pal_so2(SO2_change),
color = 'grey',
weight = 1,
fillOpacity = 0.8,
popup = popup_so2,
highlight = highlightOptions(
weight = 4,
color = "#666",
fillOpacity = 0.3,
bringToFront = TRUE)) %>%
addLegend(data = nyc_tract,
pal = pal_so2,
values = ~nyc_tract$SO2_change, opacity = 0.7, title = NULL,
position = "bottomright")
These plots show that:
# pm in 2011
so2_12_plot <- ggplot(tract_plot) +
aes(long, lat, group = group, fill=cut_number(m_2_2012, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Greens") +
labs(
title = 'SO2, 2011',
x = '',
y = ''
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
# pm in 2016
so2_16_plot <- ggplot(tract_plot) +
aes(long, lat, group=group, fill=cut_number(m_2_2016, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Greens") +
labs(
title = 'SO2, 2016',
x = '',
y = ''
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
so2_diff_plot <- ggplot(tract_plot) +
aes(long, lat, group = group, fill=cut_number(so2_dff, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Greens") +
labs(
title = 'Reduction in SO2, 2011-2016',
x = '',
y = ''
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
# reduction in pms
so2_rdiff_plot <- ggplot(tract_plot) +
aes(long, lat, group=group, fill=cut_number(s2_rdff, 5)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette="Greens") +
labs(
title = 'Reduction in SO2, relative',
x = '',
y = '',
caption = 'Data source: NYCCAS'
) +
north(tract_plot) +
theme_bw() + theme(legend.position = 'none') +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
# combine
(so2_12_plot + so2_16_plot)/(so2_diff_plot + so2_rdiff_plot)
ro2_breaks <- getJenksBreaks(nyc_tract@data$d_ro2, 5)
ro2_change <- ggplot(tract_plot) +
aes(long, lat, group = group, fill=cut(d_ro2, breaks = c(-6, -1, 0, 1, 6, 40, 106, 159), include.lowest = TRUE)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
labs(
title = 'Changes in fuel oil #2'
) +
north(tract_plot) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
ro4_breaks <- getJenksBreaks(nyc_tract@data$d_ro4, 5)
ro4_change <- ggplot(tract_plot) +
aes(long, lat, group = group, fill=cut(d_ro2, breaks = c(-31,-1, 0, 14, 46, 137, 305), include.lowest = TRUE)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
labs(
title = 'Changes in fuel oil #4'
) +
north(tract_plot) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
ro6_breaks <- getJenksBreaks(nyc_tract@data$d_ro6, 5)
ro6_change <- ggplot(tract_plot) +
aes(long, lat, group = group, fill = cut(d_ro6, breaks = c(-69, -1, 0, 6, 47, 130, 248), include.lowest = TRUE)) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
labs(
title = 'Changes in fuel oil #6'a
) +
north(tract_plot) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
ng_breaks <- getJenksBreaks(nyc_tract@data$d_ng, 5)
ng_change <- ggplot(tract_plot) +
aes(long, lat, group = group, fill = cut(d_ng, breaks = c(-482, -13, -1, 0, 138))) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
labs(
title = 'Changes in natural gas'
) +
north(tract_plot) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
To make things easier for my analysis, I will use the subsetted dataset (2158 obs).
model_tract_sp <- st_read(dsn = '/Users/zhanglvou/Desktop/GoMailman/research/number_boilers/fuel_data/data/so2_abs_lag', layer = 'model_so2_abs_lag') %>%
filter(!is.na(so2_dff) & !is.na(s2_rdff) & !is.na(avg_yer)) %>%
as_Spatial(.)
## Reading layer `model_so2_abs_lag' from data source `/Users/zhanglvou/Desktop/GoMailman/research/number_boilers/fuel_data/data/so2_abs_lag' using driver `ESRI Shapefile'
## Simple feature collection with 2166 features and 41 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -74.25909 ymin: 40.4774 xmax: -73.70001 ymax: 40.91758
## epsg (SRID): NA
## proj4string: +proj=longlat +ellps=GRS80 +no_defs
SO2
model_tract_data <- model_tract_sp@data
lm_so2 <- lm(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2 + bus + hvytrk + medtrk + car + avg_yer, data = model_tract_data)
check the coefficients of the linear regression model
lm_so2 %>%
broom::tidy()
## # A tibble: 11 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 7.37e+1 5.20 14.2 1.21e-43
## 2 d_ro2 -5.96e-3 0.0103 -0.579 5.63e- 1
## 3 d_ro4 -3.06e-2 0.00649 -4.72 2.50e- 6
## 4 d_ro6 6.18e-2 0.00641 9.65 1.37e-21
## 5 d_ng -9.22e-3 0.00275 -3.36 7.95e- 4
## 6 d_d2 1.80e-2 0.0814 0.221 8.25e- 1
## 7 bus 9.22e-6 0.00000136 6.79 1.49e-11
## 8 hvytrk 3.74e-6 0.000000921 4.06 5.00e- 5
## 9 medtrk -2.13e-6 0.000000799 -2.66 7.76e- 3
## 10 car -5.21e-8 0.0000000119 -4.38 1.24e- 5
## 11 avg_yer -3.60e-2 0.00269 -13.4 2.61e-39
lm_so2 %>%
broom::glance()
## # A tibble: 1 x 11
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.177 0.173 1.97 46.2 9.36e-84 11 -4518. 9061. 9129.
## # … with 2 more variables: deviance <dbl>, df.residual <int>
R2 is 0.177.
plot the residuals. I expect them to be highly correlated.
model_tract_sp@data <- add_residuals(model_tract_sp@data, lm_so2)
model_tract_sp@data$id <- rownames(model_tract_sp@data)
gpclibPermit()
## [1] TRUE
model_tract_sp.fort <- broom::tidy(model_tract_sp, region = "id")
model_tract_sp_plot <- plyr::join(model_tract_sp.fort, model_tract_sp@data, by = "id")
ggplot(model_tract_sp_plot) +
aes(long, lat, group = group, fill = cut(model_tract_sp_plot$resid, breaks = c(-11, -1.58, -1.01, 0, 0.945, 9))) +
geom_polygon() +
coord_equal() +
scale_fill_brewer(name="Quantile", palette = "RdBu", direction = -1) +
labs(
title = 'Changes in natural gas'
) +
north(model_tract_sp_plot) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)

It’s very clustered
To run the durbin model, I need to exclude some missing rows in the dataset
neighbor <- poly2nb(model_tract_sp)
list_weight <- nb2listw(neighbor)
durbin_so2 <- lagsarlm(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2, data = model_tract_sp, Durbin = TRUE, list_weight)
Some neighbors and list of weights
model_tract_sp@data$d_so2_resid <- residuals(durbin_so2)
popup_dso2_resid <- str_c('<b>GEOID: </b>', model_tract_sp$geoid, '<br>',
'<b>Residual of spatial durbin: </b>', round(model_tract_sp$d_so2_resid, digits = 2))
pal_dso2_resid <- colorQuantile('RdBu', NULL, n = 5, reverse = TRUE)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = model_tract_sp,
fillColor = ~pal_dso2_resid(model_tract_sp$d_so2_resid),
color = 'grey',
weight = 1,
fillOpacity = 0.8,
popup = popup_dso2_resid,
highlight = highlightOptions(
weight = 4,
color = "#666",
fillOpacity = 0.3,
bringToFront = TRUE))
moran_I <- function(x, y = NULL, W){
if(is.null(y)) y = x
xp <- (x - mean(x, na.rm=T))/sd(x, na.rm=T)
yp <- (y - mean(y, na.rm=T))/sd(y, na.rm=T)
W[which(is.na(W))] <- 0
n <- nrow(W)
global <- (xp%*%W%*%yp)/(n - 1)
local <- (xp*W%*%yp)
list(global = global, local = as.numeric(local))
}
simula_moran <- function(x, y = NULL, W, nsims = 1000){
if(is.null(y)) y = x
n = nrow(W)
IDs = 1:n
xp <- (x - mean(x, na.rm=T))/sd(x, na.rm=T)
W[which(is.na(W))] <- 0
global_sims = NULL
local_sims = matrix(NA, nrow = n, ncol=nsims)
ID_sample = sample(IDs, size = n*nsims, replace = T)
y_s = y[ID_sample]
y_s = matrix(y_s, nrow = n, ncol = nsims)
y_s <- (y_s - apply(y_s, 1, mean))/apply(y_s, 1, sd)
global_sims <- as.numeric( (xp%*%W%*%y_s)/(n - 1) )
local_sims <- (xp*W%*%y_s)
list(global_sims = global_sims,
local_sims = local_sims)
}
bw_so2 <- gwr.sel(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2 + bus + hvytrk + medtrk + car + avg_yer, data = model_tract_sp, gweight = gwr.Gauss, verbose = TRUE)
## Bandwidth: 24.79522 CV score: 9253.755
## Bandwidth: 40.07945 CV score: 10095.97
## Bandwidth: 15.34904 CV score: 7388.562
## Bandwidth: 9.510986 CV score: 4744.764
## Bandwidth: 5.902868 CV score: 3202.626
## Bandwidth: 3.672928 CV score: 2749.705
## Bandwidth: 2.144625 CV score: 2565.494
## Bandwidth: 1.350206 CV score: 2416.616
## Bandwidth: 0.8592285 CV score: 2426.233
## Bandwidth: 1.16553 CV score: 2389.564
## Bandwidth: 1.122779 CV score: 2389.252
## Bandwidth: 1.138186 CV score: 2388.732
## Bandwidth: 1.141723 CV score: 2388.717
## Bandwidth: 1.141229 CV score: 2388.717
## Bandwidth: 1.14127 CV score: 2388.717
## Bandwidth: 1.141311 CV score: 2388.717
## Bandwidth: 1.14127 CV score: 2388.717
gwr_so2 <- gwr(so2_dff ~ d_ro2 + d_ro4 + d_ro6 + d_ng + d_d2 + bus + hvytrk + medtrk + car + avg_yer, data = model_tract_sp, bandwidth = bw_so2, gweight = gwr.Gauss)
model_tract_sp@data$gwr_r2 <- gwr_so2$SDF$localR2
model_tract_sp@data$ro2_coeff <- gwr_so2$SDF$d_ro2
model_tract_sp@data$ro4_coeff <- gwr_so2$SDF$d_ro4
model_tract_sp@data$ng_coeff <- gwr_so2$SDF$d_ng
model_tract_sp@data$d2_coeff <- gwr_so2$SDF$d_d2
model_tract_sp@data$ro4_coeff <- gwr_so2$SDF$d_ro4
model_tract_sp@data$gwr_residual <- gwr_so2$SDF$gwr.e
popup_localr2 <- str_c('<b>GEOID: </b>', model_tract_sp$geoid, '<br>',
'<b>Local R2: </b>', round(gwr_so2$SDF$localR2, digits = 2))
pal <- colorQuantile('Greens', NULL, n = 5)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = gwr_so2$SDF,
fillColor = ~pal(gwr_so2$SDF$localR2),
color = 'grey',
weight = 1,
fillOpacity = 0.8,
popup = popup_localr2,
highlight = highlightOptions(
weight = 4,
color = "#666",
fillOpacity = 0.3,
bringToFront = TRUE))
popup_ro6 <- str_c('<b>GEOID: </b>', model_tract_sp$geoid, '<br>',
'<b>Coefficient of #RO6 : </b>', round(gwr_so2$SDF$d_ro6, digits = 2))
bins <- getJenksBreaks(gwr_so2$SDF$d_ro6, 6)
pal_ro6 <- colorBin("RdYlBu", domain = gwr_so2$SDF$d_ro6, bins = c(-0.39763151, -0.04608157, 0, 0.12670816, 0.21932648 , 0.47350907), reverse = TRUE)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = gwr_so2$SDF,
fillColor = ~pal_ro6(gwr_so2$SDF$d_ro6),
color = 'grey',
weight = 1,
fillOpacity = 0.8,
popup = popup_ro6,
highlight = highlightOptions(
weight = 4,
color = "#666",
fillOpacity = 0.3,
bringToFront = TRUE)) %>%
addLegend(pal = pal_ro6, values = gwr_so2$SDF$d_ro6, opacity = 0.7, title = NULL,
position = "bottomright")
## Warning in pal_ro6(gwr_so2$SDF$d_ro6): Some values were outside the color
## scale and will be treated as NA